---
title: "The Gender Gap in Peace and Conflict Journals, 2000–2024"
subtitle: "Replication Code: Citation Analysis for II"
author:
  - Daina Chiba
  - Wakako Maekawa
date: today
format:
  html: 
    toc: true
    page-layout: full
    embed-resources: true
code-fold: false
execute:
  message: false
  warning: false
---

# Preparation

Install and load necessary packages.

```{r}
## Install pacman if not already installed
if (!require("pacman")) install.packages("pacman")

## Unload any loaded packages
pacman::p_unload(pacman::p_loaded(), character.only = TRUE)

## Delete everything in the memory
rm(list=ls(all=TRUE))

## Load packages
pacman::p_load(tidyverse, stargazer, MASS, ggeffects, knitr, sandwich, lmtest)
```

Define colors

```{r}
## Define colors
col_m <- "#5DC863FF"
col_f <- "#440154FF"
col_u <- rgb(205, 205, 205, maxColorValue = 255)
```

Load data

```{r}
df <- readRDS("data_citations_ii.rds")
```

```{r}
data.frame(
  All_male = sum(df $ All_male),
  All_female = sum(df $ All_female),
  Mixed_gender = sum(df $ Mixed_gender),
  Total = sum(df $ citation_num))
```

# Descriptive statistics

```{r}
summary(df $ per_female)
```

```{r}
summary(df $ citation_num)
```

```{r}
summary(df $ All_female)
```

```{r}
summary(df $ All_male)
```

```{r}
summary(df $ Mixed_gender)
```

Correlation between per_female and fem_topic

```{r}
cor.test(df$fem_topic, df$per_female)
```

# Regressions

We run negative binomial regression models where the DV is the number of citations by three different types of articles:

1. Citations by any articles
2. Citations by all-female-author articles
3. Citations by all-male-author articles
4. Citations by mixed-gender-author articles

To account for the age effect, whereby articles published earlier have had more time to accumulate citations, we include log (article age) and choose ones that fit the data better.

```{r}
# Any citations
mod1_os <- glm.nb(citation_num ~ per_female + num_author + fem_topic + 
                    offset(log(age)), data = df)
# Number of citations by all-female articles
mod2_os <- update(mod1_os, All_female ~ .)
# Number of citations by all-male articles
mod3_os <- update(mod1_os, All_male ~ .)
# Number of citations by mixed-gender articles
mod4_os <- update(mod1_os, Mixed_gender ~ .)
```

We obtain robust SEs to account for heteroskedasticity.

```{r}
se1_os <- sqrt(diag(vcovHC(mod1_os, type = "HC1")))
se2_os <- sqrt(diag(vcovHC(mod2_os, type = "HC1")))
se3_os <- sqrt(diag(vcovHC(mod3_os, type = "HC1")))
se4_os <- sqrt(diag(vcovHC(mod4_os, type = "HC1")))
modos_list <- list(mod1_os, mod2_os, mod3_os, mod4_os)
se_list <- list(se1_os, se2_os, se3_os, se4_os)
```


```{r}
#| warning: false
#| eval: false
#| echo: false

stargazer(modos_list, se = se_list,
          type = "text", 
          title = "Negative binomial models of citations",
          covariate.labels = c("Ratio of female authors", 
                               "Number of authors", 
                               "Feminine topics"),
          dep.var.labels = c("Times cited by", "Times cited by", 
                             "Times cited by", "Times cited by"),
          column.labels = c("any articles", "all-female", "all-male", "mixed-gender"),
          notes = c("All models include age of articles as offset"))
```

```{r}
#| warning: false
#| eval: false
#| echo: false

stargazer(modos_list, se = se_list,
          type = "latex", 
          title = "Negative binomial models of citations",
          covariate.labels = c("Ratio of female authors", 
                               "Number of authors", 
                               "Feminine topics"),
          dep.var.labels = c("Times cited by", "Times cited by", 
                             "Times cited by", "Times cited by"),
          column.labels = c("any articles", "all-female", "all-male", "mixed-gender"),
          notes = c("All models include age of articles as offset"))
```

## Table A1 in Online Appendix

```{r}
#| warning: false
#| results: asis

stargazer(modos_list, se = se_list,
          type = "html", 
          title = "Negative binomial models of citations",
          covariate.labels = c("Ratio of female authors", 
                               "Number of authors", 
                               "Feminine topics"),
          dep.var.labels = c("Times cited by", "Times cited by", 
                             "Times cited by", "Times cited by"),
          column.labels = c("any articles", "all-female", "all-male", "mixed-gender"),
          notes = c("All models include age of articles as offset"))
```


## Marginal effects

```{r}
df |> 
  summarise(
    mean_age = mean(age, na.rm = TRUE),
    median_age = median(age, na.rm = TRUE),
    mean_femTopic = mean(fem_topic, na.rm = TRUE),
    median_femTopic = median(fem_topic, na.rm = TRUE),
    mean_numAuthor = mean(num_author, na.rm = TRUE),
    median_numAuthor = median(num_author, na.rm = TRUE),
    mean_femRatio = mean(per_female),
    median_femRatio = median(per_female)
  ) |> 
  pivot_longer(everything(), 
               names_to = c(".value", "variable"), 
               names_sep = "_")
```

Illustrate the marginal effects for median values

```{r}
ggpredict(model = mod1_os, 
          condition = c(age = 10, fem_topic = 0.056, num_author = 2), 
          terms = c("per_female"), 
          vcov.fun = "vcovHC",
          vcov.type = "HC1")
```

Effect of female ratio on citation by all-female-authored articles

```{r}
predict_response(model = mod2_os, 
                 condition = c(age = 10, fem_topic = 0.056, num_author = 2), 
                 terms = c("per_female"))
```

Effect of female ratio on citation by all-male articles

```{r}
predict_response(model = mod3_os, 
                 condition = c(age = 10, fem_topic = 0.056, num_author = 2), 
                 terms = c("per_female"), 
                 vcov.fun = "vcovHC",
                 vcov.type = "HC1")
```

## Time interaction

```{r}
mod_int1 <- glm.nb(citation_num ~ per_female * (time + I(time^2) + I(time^3)) + 
                     num_author + fem_topic + offset(log(age)), data = df)
# Number of citations by all-female articles
mod_int2 <- update(mod_int1, All_female ~ .)
# Number of citations by all-male articles
mod_int3 <- update(mod_int1, All_male ~ .)
# Number of citations by mixed-gender articles
mod_int4 <- update(mod_int1, Mixed_gender ~ .)
```

We obtain robust SEs to account for heteroskedasticity.

```{r}
se_int1 <- sqrt(diag(vcovHC(mod_int1, type = "HC1")))
se_int2 <- sqrt(diag(vcovHC(mod_int2, type = "HC1")))
se_int3 <- sqrt(diag(vcovHC(mod_int3, type = "HC1")))
se_int4 <- sqrt(diag(vcovHC(mod_int4, type = "HC1")))
modint_list <- list(mod_int1, mod_int2, mod_int3, mod_int4)
se_list <- list(se_int1, se_int2, se_int3, se_int4)
```

```{r}
#| warning: false
#| eval: false
#| echo: false

stargazer(modint_list, se = se_list,
          column.labels = c("All","By all-female","By all-male","By mixed-gender"),
          type = "text")
```

```{r}
#| warning: false
#| eval: false
#| echo: false

stargazer(modint_list, se = se_list,
          column.labels = c("All","By all-female","By all-male","By mixed-gender"),
          type = "latex")
```

## Table A2 in Online Appendix

```{r}
#| warning: false
#| results: asis

stargazer(modint_list, se = se_list,
          column.labels = c("All","By all-female","By all-male","By mixed-gender"),
          type = "html")
```
